# code.r
# This file provides code to replicate analyses in the main text and supplemental materials
# of Bailey, Collins, Rhodes, and Rice (2023)
#
# dr 12.21.2023
# =-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# front-end matters
library(glue)
library(quanteda)
library(stm)
library(tm)
library(ggplot2)
library(readtext)
library(dplyr)
library(devtools)
library(quanteda.dictionaries)
library(quanteda.sentiment)

# set seed	
set.seed(1234)

# load workspace
load("replication_workspace.RData")

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure 1: Plot of Articles Over time
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

myCounts <- as.data.frame(table(textdata$year))
colnames(myCounts) <- c("myYear", "count")
myCounts$myYear <- as.numeric(as.character(myCounts$myYear))
myYear <- as.data.frame(seq(from=min(myCounts$myYear), to=max(myCounts$myYear), by=1))
colnames(myYear) <- "myYear"
myCounts <- merge(myCounts, myYear, by="myYear", all.y=T)
myCounts$count[which(is.na(myCounts$count))] <- 0
 
theme_set(theme_bw())
ggplot(myCounts, aes(x=myYear, y=count)) + 
	geom_point(size=3) +
	geom_segment(aes(x=myYear, xend=myYear, y=0, yend=count)) + 
	labs(title = "Count of Articles By Year") +
	xlab("Year") +
  ylab("Count") +
  xlim(1997,2009) +
	theme(axis.text.x = element_text(angle = 65, vjust=0.60)) 

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Structural Topic Model of Issue Attention
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-



# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure 2: Plot of Topic Prevalence Effects
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# subset to the two topics we want for plotting
plotData %>% 
  filter(Topic %in% c("marriage_marry_massachusetts_marriages", 
                      "sodomy_lawrence_custody_adoption")) %>%
ggplot() + 
  geom_point(aes(x=Estimate,y= as.factor(Covariate))) +
  geom_segment(aes(x=Lower, xend=Upper, y=as.factor(Covariate), yend=as.factor(Covariate))) +
  ylab("") + 
  geom_vline(xintercept=0, color = "darkgrey") +
  facet_wrap(~Topic, labeller = labeller(Topic = new.labs)) 

#=-=-=-=-=-=-=-=-=-=--
# Sentiment Analysis
#=-=-=-=-=-=-=-=-=-=-

# calculate Lexicoder sentiment
tmpdfm <- as.data.frame(dfm_lookup(corpusAsDfm, dictionary = data_dictionary_LSD2015))
sentimentDfm$polarity_lsd <- ((tmpdfm$positive + tmpdfm$neg_negative) - (tmpdfm$negative+tmpdfm$neg_positive))/((tmpdfm$positive + tmpdfm$neg_negative) + (tmpdfm$negative+tmpdfm$neg_positive))

# calculate NRC sentiment
tmpdfm <- as.data.frame(dfm_lookup(corpusAsDfm, dictionary = data_dictionary_NRC))
sentimentDfm$polarity_nrc <- (tmpdfm$positive - tmpdfm$negative)/(tmpdfm$positive + tmpdfm$negative)

# calculate General Inquirer sentiment
tmpdfm <- as.data.frame(dfm_lookup(corpusAsDfm, dictionary = data_dictionary_geninqposneg))
sentimentDfm$polarity_geninq <- (tmpdfm$positive - tmpdfm$negative)/(tmpdfm$positive + tmpdfm$negative)

# calculate Ensemble sentiment
sentimentDfm$polarity_ensemble <- (sentimentDfm$polarity_lsd + sentimentDfm$polarity_nrc + sentimentDfm$polarity_geninq)/3

# calculate models
m_ensemble <- summary(lm(polarity_ensemble ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_geninq <- summary(lm(polarity_geninq ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_lsd <- summary(lm(polarity_lsd ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_nrc <- summary(lm(polarity_nrc ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))

# table to store results
m_tuples <- matrix(NA, 12, 5)
m_tuples[,1] <- c(rep("Lexicoder", 3), rep("NRC", 3), rep("General Inquirer", 3), rep("Ensemble", 3))
m_tuples[,2] <- c(rep(c("Lawrence", "Goodridge", "Lofton"), 4))

# pull results
m_tuples[1:3,4] <- as.numeric(m_lsd$coefficients[2:4,1]) 
m_tuples[1:3,3] <- as.numeric(m_lsd$coefficients[2:4,1]) - (1.65 * as.numeric(m_lsd$coefficients[2:4,2]))
m_tuples[1:3,5] <- as.numeric(m_lsd$coefficients[2:4,1]) + (1.65 * as.numeric(m_lsd$coefficients[2:4,2]))

m_tuples[4:6,4] <- as.numeric(m_nrc$coefficients[2:4,1]) 
m_tuples[4:6,3] <- as.numeric(m_nrc$coefficients[2:4,1]) - (1.65 * as.numeric(m_nrc$coefficients[2:4,2]))
m_tuples[4:6,5] <- as.numeric(m_nrc$coefficients[2:4,1]) + (1.65 * as.numeric(m_nrc$coefficients[2:4,2]))

m_tuples[7:9,4] <- as.numeric(m_geninq$coefficients[2:4,1]) 
m_tuples[7:9,3] <- as.numeric(m_geninq$coefficients[2:4,1]) - (1.65 * as.numeric(m_geninq$coefficients[2:4,2]))
m_tuples[7:9,5] <- as.numeric(m_geninq$coefficients[2:4,1]) + (1.65 * as.numeric(m_geninq$coefficients[2:4,2]))

m_tuples[10:12,4] <- as.numeric(m_ensemble$coefficients[2:4,1]) 
m_tuples[10:12,3] <- as.numeric(m_ensemble$coefficients[2:4,1]) - (1.65 * as.numeric(m_ensemble$coefficients[2:4,2]))
m_tuples[10:12,5] <- as.numeric(m_ensemble$coefficients[2:4,1]) + (1.65 * as.numeric(m_ensemble$coefficients[2:4,2]))

# store as dataframe for plotting
plotData <- as.data.frame(m_tuples)
colnames(plotData) <- c("Dictionary", "Covariate", "Lower", "Estimate", "Upper")
plotData$Estimate <- as.numeric(as.character(plotData$Estimate))
plotData$Lower <- as.numeric(as.character(plotData$Lower))
plotData$Upper <- as.numeric(as.character(plotData$Upper))
plotData$Covariate <- as.factor(plotData$Covariate)
plotData$Covariate <- ordered(plotData$Covariate, levels = c("Lawrence", "Goodridge", "Lofton"))

# Figure 3: plot of coefficients from models of article polarity
plotData %>%
 ggplot() + 
  geom_point(aes(x=Estimate,y= Dictionary)) +
  geom_segment(aes(x=Lower, xend=Upper, y=Dictionary, yend=Dictionary)) +
  geom_vline(xintercept=0, color = "darkgrey") +
  xlim(-.31,.31) +
  facet_wrap(~Covariate, ncol = 1) 

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Appendix
# =-=-=-=-=-=-=-=-=-=--=-=-=-=-=-

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A1: Diagnostic Plot for STM Models
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--==-=-

# check different structural topic models
myModels <- searchK(mydfm, K=c(6,12,18), prevalence =~ lawrence + goodridge + lofton + erie + us + rainbow + tapestry + s(dateCount, 5),  N=150, data= textdata, max.em.its = 1000)
plot(myModels)

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A2: Scatterplot of Topic Quality
# =-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-

# estimate 2 then look at topicQuality to see which has best coherence/exclusivity
m12 <- stm(mydfm, K=12, prevalence =~ lawrence + goodridge + lofton + erie + us + rainbow + tapestry + s(dateCount, 5), data=textdata, max.em.its = 1000, seed=1234, init.type = "Spectral")
m18 <- stm(mydfm, K=18, prevalence =~ lawrence + goodridge + lofton + erie + us + rainbow + tapestry + s(dateCount, 5), data=textdata, max.em.its = 1000, seed=1234, init.type = "Spectral" )

# create data to compare topic qualities
m12plot <- as.data.frame(cbind(c(1:12),
                               exclusivity(m12),
                               semanticCoherence(model=m12, mydfm), 
                               "12 Topics"))
m18plot <- as.data.frame(cbind(c(1:18),
                               exclusivity(m18),
                               semanticCoherence(model=m18, mydfm), 
                               "18 Topics"))
plotData <- rbind(m12plot,m18plot)
colnames(plotData) <- c("K", "Exclusivity", "Semantic_Coherence", "Model")
plotData$Exclusivity <- as.numeric(as.character(plotData$Exclusivity))
plotData$Semantic_Coherence <- as.numeric(as.character(plotData$Semantic_Coherence))                        

# plot data to compare topic qualities
ggplot(plotData, aes(Semantic_Coherence, Exclusivity, shape = Model)) + 
  geom_point(size = 3, alpha = 0.5) +
  geom_text(aes(label = K), nudge_x = .05, nudge_y = .05) +
  labs(x="Semantic Coherence", y = "Exclusivity", main = "Plot of Topic Quality Measures")

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A3: Plot of Prevalence Across all Topics from STM 
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# plot effects
tuples <- matrix(NA, k*3,5)
# Lawrence results
for (i in 1:k){
  tmpModel <- summary(mAllEffects)[3][[1]][[i]]
  tuples[i,1] <- myTopicLabels[i]
  tuples[i,2] <- "Lawrence"
  estimate <- summary(mAllEffects)[3][[1]][[i]][,1][2]
  se <- summary(mAllEffects)[3][[1]][[i]][,2][2]
  tuples[i,3] <- estimate
  tuples[i,4] <- estimate - (1.65 * se)
  tuples[i,5] <- estimate + (1.65 * se)
}
# Goodridge results
for (i in 1:k){
  tmpModel <- summary(mAllEffects)[3][[1]][[i]]
  tuples[i+k,1] <- myTopicLabels[i]
  tuples[i+k,2] <- "Goodridge"
  estimate <- summary(mAllEffects)[3][[1]][[i]][,1][3]
  se <- summary(mAllEffects)[3][[1]][[i]][,2][3]
  tuples[i+k,3] <- estimate
  tuples[i+k,4] <- estimate - (1.65 * se)
  tuples[i+k,5] <- estimate + (1.65 * se)
}
# Lofton results
for (i in 1:k){
  tmpModel <- summary(mAllEffects)[3][[1]][[i]]
  tuples[i+(k*2),1] <- myTopicLabels[i]
  tuples[i+(k*2),2] <- "Lofton"
  estimate <- summary(mAllEffects)[3][[1]][[i]][,1][4]
  se <- summary(mAllEffects)[3][[1]][[i]][,2][4]
  tuples[i+(k*2),3] <- estimate
  tuples[i+(k*2),4] <- estimate - (1.65 * se)
  tuples[i+(k*2),5] <- estimate + (1.65 * se)
}

# store as dataframe for plotting
plotData <- as.data.frame(tuples)
colnames(plotData) <- c("Topic", "Covariate", "Estimate", "Lower", "Upper")
plotData$Estimate <- as.numeric(as.character(plotData$Estimate))
plotData$Lower <- as.numeric(as.character(plotData$Lower))
plotData$Upper <- as.numeric(as.character(plotData$Upper))
plotData$Covariate <- as.factor(plotData$Covariate)
plotData$Covariate <- ordered(plotData$Covariate, levels = c("Lofton", "Goodridge", "Lawrence"))

# plot all of these
ggplot(plotData) + 
  geom_point(aes(x=Estimate,y= Covariate)) +
  geom_segment(aes(x=Lower, xend=Upper, y=Covariate, yend=Covariate)) +
  geom_vline(xintercept=0, color = "darkgrey") +
  facet_wrap(~Topic) 

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Table A1: Coefficient Estimates from STM
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# same-sex marriage
summary(mAllEffects)[3][[1]][10]

# domestic relations
summary(mAllEffects)[3][[1]][12]

stargazer(summary(mAllEffects)[3][[1]][10], summary(mAllEffects)[3][[1]][12])


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Table A2: Full Results for Models of Polarity
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

m_ensemble
m_geninq
m_lsd
m_nrc

# =-=-=-=-=-=-=-=-=-=-=
# Figure A4: T-Tests
# =-=-=-=-=-=-=-=-=-=-=

# lawrence before & after
t1 <- t.test(sentimentDfm$polarity_ensemble[which(sentimentDfm$goodridge == 0)] ~ sentimentDfm$lawrence[which(sentimentDfm$goodridge == 0)])
t2 <- t.test(sentimentDfm$polarity_nrc[which(sentimentDfm$goodridge == 0)] ~ sentimentDfm$lawrence[which(sentimentDfm$goodridge == 0)])
t3 <- t.test(sentimentDfm$polarity_geninq[which(sentimentDfm$goodridge == 0)] ~ sentimentDfm$lawrence[which(sentimentDfm$goodridge == 0)])
t4 <- t.test(sentimentDfm$polarity_lsd[which(sentimentDfm$goodridge == 0)] ~ sentimentDfm$lawrence[which(sentimentDfm$goodridge == 0)])

tdf <- as.data.frame(rbind(t1$conf.int[1:2], t2$conf.int[1:2], t3$conf.int[1:2], t4$conf.int[1:2]))
colnames(tdf) <- c("Lower", "Upper")
tdf <- tdf * -1
tdf$DV <- c("Ensemble", "NRC", "Gen. Inq.", "Lexicoder")

pdf("figA4-top.pdf", height=2, width=5)
ggplot(tdf) +
  geom_segment(aes(x=Lower, xend=Upper, y = DV, yend = DV)) + 
  xlab("More Negative After          ...         More Positive After") +
  ylab ("") +
  geom_vline(xintercept = 0, color = "darkgrey", linetype = "dashed") + 
  xlim(-.2,.2) +
  ggtitle(label = "Lawrence v. Texas") +
  theme_bw()
dev.off()

# goodridge before & after
t1 <- t.test(sentimentDfm$polarity_ensemble[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)] ~ sentimentDfm$goodridge[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)])
t2 <- t.test(sentimentDfm$polarity_nrc[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)] ~ sentimentDfm$goodridge[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)])
t3 <- t.test(sentimentDfm$polarity_geninq[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)] ~ sentimentDfm$goodridge[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)])
t4 <- t.test(sentimentDfm$polarity_lsd[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)] ~ sentimentDfm$goodridge[which(sentimentDfm$lawrence == 1 & sentimentDfm$lofton == 0)])

tdf <- as.data.frame(rbind(t1$conf.int[1:2], t2$conf.int[1:2], t3$conf.int[1:2], t4$conf.int[1:2]))
colnames(tdf) <- c("Lower", "Upper")
tdf <- tdf * -1
tdf$DV <- c("Ensemble", "NRC", "Gen. Inq.", "Lexicoder")

pdf("figA4-middle.pdf", height=2, width=5)
ggplot(tdf) +
  geom_segment(aes(x=Lower, xend=Upper, y = DV, yend = DV)) + 
  xlab("More Negative After          ...         More Positive After") +
  ylab ("") +
  geom_vline(xintercept = 0, color = "darkgrey", linetype = "dashed") + 
  xlim(-.15,.15) +
  ggtitle(label = "Goodridge v. Department of Public Health") +
  theme_bw()
dev.off()

# lofton before & after
t1 <- t.test(sentimentDfm$polarity_ensemble[which(sentimentDfm$goodridge== 1)] ~ sentimentDfm$lofton[which(sentimentDfm$goodridge == 1)])
t2 <- t.test(sentimentDfm$polarity_nrc[which(sentimentDfm$goodridge== 1)] ~ sentimentDfm$lofton[which(sentimentDfm$goodridge == 1)])
t3 <- t.test(sentimentDfm$polarity_geninq[which(sentimentDfm$goodridge== 1)] ~ sentimentDfm$lofton[which(sentimentDfm$goodridge == 1)])
t4 <- t.test(sentimentDfm$polarity_lsd[which(sentimentDfm$goodridge== 1)] ~ sentimentDfm$lofton[which(sentimentDfm$goodridge == 1)])

tdf <- as.data.frame(rbind(t1$conf.int[1:2], t2$conf.int[1:2], t3$conf.int[1:2], t4$conf.int[1:2]))
tdf <- tdf * -1
colnames(tdf) <- c("Lower", "Upper")
tdf$DV <- c("Ensemble", "NRC", "Gen. Inq.", "Lexicoder")

pdf("figA4-bottom.pdf", height=2, width=5)
ggplot(tdf) +
  geom_segment(aes(x=Lower, xend=Upper, y = DV, yend = DV)) + 
  xlab("More Negative After          ...         More Positive After") +
  ylab ("") +
  geom_vline(xintercept = 0, color = "darkgrey", linetype = "dashed") + 
  xlim(-.15,.15) +
  ggtitle(label = "Lofton v. Dep't of Children & Family Servs.") +
  theme_bw()
dev.off()

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figures A5, A6, and A7: Randomization Tests 
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# ------
# For efficiency's sake, we estimate the models sequentially for the different
# dependent variables. We start with the Ensemble Model
# ------

# pull actual values
lawrenceT <-  m_ensemble$coefficients[2,3]
goodridgeT <- m_ensemble$coefficients[3,3]
loftonT <- m_ensemble$coefficients[4,3]

# pull proportions
lawrenceProp <- as.numeric(table(sentimentDfm$lawrence)[2]/nrow(sentimentDfm))
goodridgeProp <- as.numeric(table(sentimentDfm$goodridge)[2]/nrow(sentimentDfm))
loftonProp <- as.numeric(table(sentimentDfm$lofton)[2]/nrow(sentimentDfm))

# -------------
# Lawrence Loop
# -------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)
for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)

  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)

  # models
  tmpModel <- summary(lm(polarity_ensemble ~ larry + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA5_UpperLeft.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lawrence), bins=100, alpha=0.5) +
  geom_vline(xintercept = lawrenceT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,1]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lawrence v. Texas") + 
  theme_bw()
dev.off()

# ---------------
# Goodridge loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_ensemble ~ lawrence + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA6_UpperLeft.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Goodridge), bins=100, alpha=0.5) +
  geom_vline(xintercept = goodridgeT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,2]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Goodridge v. Department of Public Health") + 
  theme_bw()
dev.off()


# ---------------
# Lofton loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_ensemble ~ lawrence + goodridge + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

pdf("figA7_UpperLeft.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lofton), bins=100, alpha=0.5) +
  geom_vline(xintercept = loftonT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,3]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lofton v. Dep't of Children & Family Servs.") + 
  theme_bw()
dev.off()

# ------ 
# NRC Model
# ------

# pull actual values
lawrenceT <-  m_nrc$coefficients[2,3]
goodridgeT <- m_nrc$coefficients[3,3]
loftonT <- m_nrc$coefficients[4,3]

# pull proportions
lawrenceProp <- as.numeric(table(sentimentDfm$lawrence)[2]/nrow(sentimentDfm))
goodridgeProp <- as.numeric(table(sentimentDfm$goodridge)[2]/nrow(sentimentDfm))
loftonProp <- as.numeric(table(sentimentDfm$lofton)[2]/nrow(sentimentDfm))

# -------------
# Lawrence Loop
# -------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)
for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_nrc ~ larry + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA5_LowerRight.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lawrence), bins=100, alpha=0.5) +
  geom_vline(xintercept = lawrenceT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,1]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lawrence v. Texas") + 
  theme_bw()
dev.off()


# ---------------
# Goodridge loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_nrc ~ lawrence + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA6_LowerRight.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Goodridge), bins=100, alpha=0.5) +
  geom_vline(xintercept = goodridgeT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,2]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Goodridge v. Department of Public Health") + 
  theme_bw()
dev.off()

# ---------------
# Lofton loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_nrc ~ lawrence + goodridge + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

pdf("figA7_LowerRight.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lofton), bins=100, alpha=0.5) +
  geom_vline(xintercept = loftonT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,3]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lofton v. Dep't of Children & Family Servs.") + 
  theme_bw()
dev.off()


# ------ 
# LSD Model
# ------

# pull actual values
lawrenceT <-  m_lsd$coefficients[2,3]
goodridgeT <- m_lsd$coefficients[3,3]
loftonT <- m_lsd$coefficients[4,3]

# pull proportions
lawrenceProp <- as.numeric(table(sentimentDfm$lawrence)[2]/nrow(sentimentDfm))
goodridgeProp <- as.numeric(table(sentimentDfm$goodridge)[2]/nrow(sentimentDfm))
loftonProp <- as.numeric(table(sentimentDfm$lofton)[2]/nrow(sentimentDfm))

# -------------
# Lawrence Loop
# -------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)
for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_lsd ~ larry + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA5_LowerLeft.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lawrence), bins=100, alpha=0.5) +
  geom_vline(xintercept = lawrenceT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,1]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lawrence v. Texas") + 
  theme_bw()
dev.off()


# ---------------
# Goodridge loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_lsd ~ lawrence + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA6_LowerLeft.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Goodridge), bins=100, alpha=0.5) +
  geom_vline(xintercept = goodridgeT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,2]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Goodridge v. Department of Public Health") + 
  theme_bw()
dev.off()

# ---------------
# Lofton loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_lsd ~ lawrence + goodridge + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

pdf("figA7_LowerLeft.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lofton), bins=100, alpha=0.5) +
  geom_vline(xintercept = loftonT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,3]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lofton v. Dep't of Children & Family Servs.") + 
  theme_bw()
dev.off()

# ------ 
# GenInq Model
# ------

# pull actual values
lawrenceT <-  m_geninq$coefficients[2,3]
goodridgeT <- m_geninq$coefficients[3,3]
loftonT <- m_geninq$coefficients[4,3]

# pull proportions
lawrenceProp <- as.numeric(table(sentimentDfm$lawrence)[2]/nrow(sentimentDfm))
goodridgeProp <- as.numeric(table(sentimentDfm$goodridge)[2]/nrow(sentimentDfm))
loftonProp <- as.numeric(table(sentimentDfm$lofton)[2]/nrow(sentimentDfm))

# -------------
# Lawrence Loop
# -------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)
for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_geninq ~ larry + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA5_UpperRight.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lawrence), bins=100, alpha=0.5) +
  geom_vline(xintercept = lawrenceT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,1]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lawrence v. Texas") + 
  theme_bw()
dev.off()


# ---------------
# Goodridge loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_geninq ~ lawrence + goody + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

# plot
pdf("figA6_UpperRight.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Goodridge), bins=100, alpha=0.5) +
  geom_vline(xintercept = goodridgeT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,2]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Goodridge v. Department of Public Health") + 
  theme_bw()
dev.off()

# ---------------
# Lofton loop
# ---------------

# create vector to store results
testStats <- matrix(NA,1000,3)

# loop through randomizations
set.seed(90210)

for (i in 1:nrow(testStats)){
  # create tmp data
  tmp <- sentimentDfm
  
  # create random lawrence
  tmp$larry <- rbinom(nrow(tmp), 1, lawrenceProp)
  
  # create random goodridge
  tmp$goody <- rbinom(nrow(tmp), 1, goodridgeProp)
  
  # create random lofton
  tmp$lofty <- rbinom(nrow(tmp), 1, loftonProp)
  
  # models
  tmpModel <- summary(lm(polarity_geninq ~ lawrence + goodridge + lofty + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = tmp))
  
  # extract estimates
  testStats[i,] <- as.numeric(tmpModel$coefficients[2:4,3])
}

# format data
testStats <- as.data.frame(testStats)
colnames(testStats) <- c("Lawrence", "Goodridge", "Lofton")

pdf("figA7_UpperRight.pdf", width=4.5, height=3)
ggplot(testStats) +
  geom_histogram(aes(x=Lofton), bins=100, alpha=0.5) +
  geom_vline(xintercept = loftonT, linetype="dashed", size = 1.5) +
  geom_vline(xintercept = rev(sort(testStats[,3]))[50], linetype = "solid", size = 1.5) +
  #  xlim(-.1,.1) +
  xlab("T-Value") +
  ylab("Count") + 
  ggtitle("Lofton v. Dep't of Children & Family Servs.") + 
  theme_bw()
dev.off()

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Sequential Models
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A8 and Table A3: Ensemble Sequential Models
# =-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# ensemble sequence
m_1 <- summary(lm(polarity_ensemble ~ lawrence + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_2 <- summary(lm(polarity_ensemble ~ lawrence + goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_3 <- summary(lm(polarity_ensemble ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_4 <- summary(lm(polarity_ensemble ~ goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_5 <- summary(lm(polarity_ensemble ~ goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_6 <- summary(lm(polarity_ensemble ~ lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))

# store results
m_tuples <- matrix(NA, 10,6)
m_tuples[1,2] <- as.numeric(m_1$coefficients[2,1]) 
m_tuples[1,1] <- as.numeric(m_1$coefficients[2,1]) - (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[1,3] <- as.numeric(m_1$coefficients[2,1]) + (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[2,2] <- as.numeric(m_2$coefficients[2,1]) 
m_tuples[2,1] <- as.numeric(m_2$coefficients[2,1]) - (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[2,3] <- as.numeric(m_2$coefficients[2,1]) + (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[3,2] <- as.numeric(m_3$coefficients[2,1]) 
m_tuples[3,1] <- as.numeric(m_3$coefficients[2,1]) - (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[3,3] <- as.numeric(m_3$coefficients[2,1]) + (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[4,2] <- as.numeric(m_2$coefficients[3,1]) 
m_tuples[4,1] <- as.numeric(m_2$coefficients[3,1]) - (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[4,3] <- as.numeric(m_2$coefficients[3,1]) + (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[5,2] <- as.numeric(m_3$coefficients[3,1]) 
m_tuples[5,1] <- as.numeric(m_3$coefficients[3,1]) - (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[5,3] <- as.numeric(m_3$coefficients[3,1]) + (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[6,2] <- as.numeric(m_3$coefficients[4,1]) 
m_tuples[6,1] <- as.numeric(m_3$coefficients[4,1]) - (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[6,3] <- as.numeric(m_3$coefficients[4,1]) + (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[7,2] <- as.numeric(m_4$coefficients[2,1]) 
m_tuples[7,1] <- as.numeric(m_4$coefficients[2,1]) - (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[7,3] <- as.numeric(m_4$coefficients[2,1]) + (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[8,2] <- as.numeric(m_5$coefficients[2,1]) 
m_tuples[8,1] <- as.numeric(m_5$coefficients[2,1]) - (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[8,3] <- as.numeric(m_5$coefficients[2,1]) + (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[9,2] <- as.numeric(m_5$coefficients[3,1]) 
m_tuples[9,1] <- as.numeric(m_5$coefficients[3,1]) - (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[9,3] <- as.numeric(m_5$coefficients[3,1]) + (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[10,2] <- as.numeric(m_6$coefficients[2,1]) 
m_tuples[10,1] <- as.numeric(m_6$coefficients[2,1]) - (1.65 * as.numeric(m_6$coefficients[2,2]))
m_tuples[10,3] <- as.numeric(m_6$coefficients[2,1]) + (1.65 * as.numeric(m_6$coefficients[2,2]))


m_tuples[,4] <- "Ensemble"
m_tuples[,5] <- c("Lawrence", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge + Lofton", "Goodridge", "Goodridge + Lofton", "Goodridge + Lofton", "Lofton")
m_tuples[,6] <- c("Lawrence", "Lawrence", "Lawrence", "Goodridge", "Goodridge", "Lofton", "Goodridge", "Goodridge", "Lofton", "Lofton")


# store as dataframe for plotting
plotData <- as.data.frame(m_tuples)
colnames(plotData) <- c("Lower", "Estimate", "Upper", "Dictionary", "Model", "Covariate")
plotData$Estimate <- as.numeric(as.character(plotData$Estimate))
plotData$Lower <- as.numeric(as.character(plotData$Lower))
plotData$Upper <- as.numeric(as.character(plotData$Upper))
plotData$Model <- as.factor(plotData$Model)
plotData$Model <- ordered(plotData$Model, levels = c("Lofton", "Goodridge + Lofton", "Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence"))
plotData$Covariate <- as.factor(plotData$Covariate)
plotData$Covariate <- ordered(plotData$Covariate, levels = c("Lawrence", "Goodridge", "Lofton"))

# plot all of these
pdf("figA8_seqmodels_ensemble.pdf", height=6, width=4)
plotData %>% 
  ggplot() + 
  geom_point(aes(x=Estimate,y=Model)) +
  geom_segment(aes(x=Lower, xend=Upper, y=Model, yend=Model)) +
  geom_vline(xintercept=0, color = "darkgrey") +
  xlim(-.31,.31) +
  facet_wrap(~Covariate, ncol = 1) +
  theme_bw()
dev.off()

# Table A3
m_1
m_2
m_3
m_4
m_5
m_6

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A9 and Table A4: Gen. Inq. Sequential Models
# =-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-


# gen. inq. sequence
m_1 <- summary(lm(polarity_geninq ~ lawrence + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_2 <- summary(lm(polarity_geninq ~ lawrence + goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_3 <- summary(lm(polarity_geninq ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_4 <- summary(lm(polarity_geninq ~ goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_5 <- summary(lm(polarity_geninq ~ goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_6 <- summary(lm(polarity_geninq ~ lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))

# store results
m_tuples <- matrix(NA, 10,6)
m_tuples[1,2] <- as.numeric(m_1$coefficients[2,1]) 
m_tuples[1,1] <- as.numeric(m_1$coefficients[2,1]) - (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[1,3] <- as.numeric(m_1$coefficients[2,1]) + (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[2,2] <- as.numeric(m_2$coefficients[2,1]) 
m_tuples[2,1] <- as.numeric(m_2$coefficients[2,1]) - (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[2,3] <- as.numeric(m_2$coefficients[2,1]) + (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[3,2] <- as.numeric(m_3$coefficients[2,1]) 
m_tuples[3,1] <- as.numeric(m_3$coefficients[2,1]) - (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[3,3] <- as.numeric(m_3$coefficients[2,1]) + (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[4,2] <- as.numeric(m_2$coefficients[3,1]) 
m_tuples[4,1] <- as.numeric(m_2$coefficients[3,1]) - (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[4,3] <- as.numeric(m_2$coefficients[3,1]) + (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[5,2] <- as.numeric(m_3$coefficients[3,1]) 
m_tuples[5,1] <- as.numeric(m_3$coefficients[3,1]) - (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[5,3] <- as.numeric(m_3$coefficients[3,1]) + (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[6,2] <- as.numeric(m_3$coefficients[4,1]) 
m_tuples[6,1] <- as.numeric(m_3$coefficients[4,1]) - (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[6,3] <- as.numeric(m_3$coefficients[4,1]) + (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[7,2] <- as.numeric(m_4$coefficients[2,1]) 
m_tuples[7,1] <- as.numeric(m_4$coefficients[2,1]) - (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[7,3] <- as.numeric(m_4$coefficients[2,1]) + (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[8,2] <- as.numeric(m_5$coefficients[2,1]) 
m_tuples[8,1] <- as.numeric(m_5$coefficients[2,1]) - (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[8,3] <- as.numeric(m_5$coefficients[2,1]) + (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[9,2] <- as.numeric(m_5$coefficients[3,1]) 
m_tuples[9,1] <- as.numeric(m_5$coefficients[3,1]) - (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[9,3] <- as.numeric(m_5$coefficients[3,1]) + (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[10,2] <- as.numeric(m_6$coefficients[2,1]) 
m_tuples[10,1] <- as.numeric(m_6$coefficients[2,1]) - (1.65 * as.numeric(m_6$coefficients[2,2]))
m_tuples[10,3] <- as.numeric(m_6$coefficients[2,1]) + (1.65 * as.numeric(m_6$coefficients[2,2]))


m_tuples[,4] <- "GenInq"
m_tuples[,5] <- c("Lawrence", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge + Lofton", "Goodridge", "Goodridge + Lofton", "Goodridge + Lofton", "Lofton")
m_tuples[,6] <- c("Lawrence", "Lawrence", "Lawrence", "Goodridge", "Goodridge", "Lofton", "Goodridge", "Goodridge", "Lofton", "Lofton")


# store as dataframe for plotting
plotData <- as.data.frame(m_tuples)
colnames(plotData) <- c("Lower", "Estimate", "Upper", "Dictionary", "Model", "Covariate")
plotData$Estimate <- as.numeric(as.character(plotData$Estimate))
plotData$Lower <- as.numeric(as.character(plotData$Lower))
plotData$Upper <- as.numeric(as.character(plotData$Upper))
plotData$Model <- as.factor(plotData$Model)
plotData$Model <- ordered(plotData$Model, levels = c("Lofton", "Goodridge + Lofton", "Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence"))
plotData$Covariate <- as.factor(plotData$Covariate)
plotData$Covariate <- ordered(plotData$Covariate, levels = c("Lawrence", "Goodridge", "Lofton"))

# plot all of these
pdf("figA9_seqmodels_geninq.pdf", height=6, width=4)
plotData %>% 
  ggplot() + 
  geom_point(aes(x=Estimate,y=Model)) +
  geom_segment(aes(x=Lower, xend=Upper, y=Model, yend=Model)) +
  geom_vline(xintercept=0, color = "darkgrey") +
  xlim(-.31,.31) +
  facet_wrap(~Covariate, ncol = 1) +
  theme_bw()
dev.off()

# Table A4
m_1
m_2
m_3
m_4
m_5
m_6

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A10 and Table A5: Ensemble Sequential Models
# =-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# estimate lexicoder models 
m_1 <- summary(lm(polarity_lsd ~ lawrence + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_2 <- summary(lm(polarity_lsd ~ lawrence + goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_3 <- summary(lm(polarity_lsd ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_4 <- summary(lm(polarity_lsd ~ goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_5 <- summary(lm(polarity_lsd ~ goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_6 <- summary(lm(polarity_lsd ~ lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))

# store results
m_tuples <- matrix(NA, 10,6)
m_tuples[1,2] <- as.numeric(m_1$coefficients[2,1]) 
m_tuples[1,1] <- as.numeric(m_1$coefficients[2,1]) - (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[1,3] <- as.numeric(m_1$coefficients[2,1]) + (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[2,2] <- as.numeric(m_2$coefficients[2,1]) 
m_tuples[2,1] <- as.numeric(m_2$coefficients[2,1]) - (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[2,3] <- as.numeric(m_2$coefficients[2,1]) + (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[3,2] <- as.numeric(m_3$coefficients[2,1]) 
m_tuples[3,1] <- as.numeric(m_3$coefficients[2,1]) - (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[3,3] <- as.numeric(m_3$coefficients[2,1]) + (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[4,2] <- as.numeric(m_2$coefficients[3,1]) 
m_tuples[4,1] <- as.numeric(m_2$coefficients[3,1]) - (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[4,3] <- as.numeric(m_2$coefficients[3,1]) + (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[5,2] <- as.numeric(m_3$coefficients[3,1]) 
m_tuples[5,1] <- as.numeric(m_3$coefficients[3,1]) - (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[5,3] <- as.numeric(m_3$coefficients[3,1]) + (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[6,2] <- as.numeric(m_3$coefficients[4,1]) 
m_tuples[6,1] <- as.numeric(m_3$coefficients[4,1]) - (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[6,3] <- as.numeric(m_3$coefficients[4,1]) + (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[7,2] <- as.numeric(m_4$coefficients[2,1]) 
m_tuples[7,1] <- as.numeric(m_4$coefficients[2,1]) - (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[7,3] <- as.numeric(m_4$coefficients[2,1]) + (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[8,2] <- as.numeric(m_5$coefficients[2,1]) 
m_tuples[8,1] <- as.numeric(m_5$coefficients[2,1]) - (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[8,3] <- as.numeric(m_5$coefficients[2,1]) + (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[9,2] <- as.numeric(m_5$coefficients[3,1]) 
m_tuples[9,1] <- as.numeric(m_5$coefficients[3,1]) - (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[9,3] <- as.numeric(m_5$coefficients[3,1]) + (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[10,2] <- as.numeric(m_6$coefficients[2,1]) 
m_tuples[10,1] <- as.numeric(m_6$coefficients[2,1]) - (1.65 * as.numeric(m_6$coefficients[2,2]))
m_tuples[10,3] <- as.numeric(m_6$coefficients[2,1]) + (1.65 * as.numeric(m_6$coefficients[2,2]))

m_tuples[,4] <- "Lexicoder"
m_tuples[,5] <- c("Lawrence", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge + Lofton", "Goodridge", "Goodridge + Lofton", "Goodridge + Lofton", "Lofton")
m_tuples[,6] <- c("Lawrence", "Lawrence", "Lawrence", "Goodridge", "Goodridge", "Lofton", "Goodridge", "Goodridge", "Lofton", "Lofton")


# store as dataframe for plotting
plotData <- as.data.frame(m_tuples)
colnames(plotData) <- c("Lower", "Estimate", "Upper", "Dictionary", "Model", "Covariate")
plotData$Estimate <- as.numeric(as.character(plotData$Estimate))
plotData$Lower <- as.numeric(as.character(plotData$Lower))
plotData$Upper <- as.numeric(as.character(plotData$Upper))
plotData$Model <- as.factor(plotData$Model)
plotData$Model <- ordered(plotData$Model, levels = c("Lofton", "Goodridge + Lofton", "Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence"))
plotData$Covariate <- as.factor(plotData$Covariate)
plotData$Covariate <- ordered(plotData$Covariate, levels = c("Lawrence", "Goodridge", "Lofton"))

# Figure A10: Sequential Models of Lexicoder Tone
pdf("figA10_seqmodels_lexicoder.pdf", height=6, width=4)
plotData %>% 
  ggplot() + 
  geom_point(aes(x=Estimate,y=Model)) +
  geom_segment(aes(x=Lower, xend=Upper, y=Model, yend=Model)) +
  geom_vline(xintercept=0, color = "darkgrey") +
  xlim(-.31,.31) +
  facet_wrap(~Covariate, ncol = 1) +
  theme_bw()
dev.off()

# Table A5
m_1
m_2
m_3
m_4
m_5
m_6

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Figure A11 and Table A6: NRC Sequential Models
# =-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# nrc sequence
m_1 <- summary(lm(polarity_nrc ~ lawrence + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_2 <- summary(lm(polarity_nrc ~ lawrence + goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_3 <- summary(lm(polarity_nrc ~ lawrence + goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_4 <- summary(lm(polarity_nrc ~ goodridge + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_5 <- summary(lm(polarity_nrc ~ goodridge + lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))
m_6 <- summary(lm(polarity_nrc ~ lofton + totalPages + s(dateCount,5) + erie + us + rainbow + tapestry, data = sentimentDfm))

# store results
m_tuples <- matrix(NA, 10,6)
m_tuples[1,2] <- as.numeric(m_1$coefficients[2,1]) 
m_tuples[1,1] <- as.numeric(m_1$coefficients[2,1]) - (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[1,3] <- as.numeric(m_1$coefficients[2,1]) + (1.65 * as.numeric(m_1$coefficients[2,2]))
m_tuples[2,2] <- as.numeric(m_2$coefficients[2,1]) 
m_tuples[2,1] <- as.numeric(m_2$coefficients[2,1]) - (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[2,3] <- as.numeric(m_2$coefficients[2,1]) + (1.65 * as.numeric(m_2$coefficients[2,2]))
m_tuples[3,2] <- as.numeric(m_3$coefficients[2,1]) 
m_tuples[3,1] <- as.numeric(m_3$coefficients[2,1]) - (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[3,3] <- as.numeric(m_3$coefficients[2,1]) + (1.65 * as.numeric(m_3$coefficients[2,2]))
m_tuples[4,2] <- as.numeric(m_2$coefficients[3,1]) 
m_tuples[4,1] <- as.numeric(m_2$coefficients[3,1]) - (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[4,3] <- as.numeric(m_2$coefficients[3,1]) + (1.65 * as.numeric(m_2$coefficients[3,2]))
m_tuples[5,2] <- as.numeric(m_3$coefficients[3,1]) 
m_tuples[5,1] <- as.numeric(m_3$coefficients[3,1]) - (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[5,3] <- as.numeric(m_3$coefficients[3,1]) + (1.65 * as.numeric(m_3$coefficients[3,2]))
m_tuples[6,2] <- as.numeric(m_3$coefficients[4,1]) 
m_tuples[6,1] <- as.numeric(m_3$coefficients[4,1]) - (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[6,3] <- as.numeric(m_3$coefficients[4,1]) + (1.65 * as.numeric(m_3$coefficients[4,2]))
m_tuples[7,2] <- as.numeric(m_4$coefficients[2,1]) 
m_tuples[7,1] <- as.numeric(m_4$coefficients[2,1]) - (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[7,3] <- as.numeric(m_4$coefficients[2,1]) + (1.65 * as.numeric(m_4$coefficients[2,2]))
m_tuples[8,2] <- as.numeric(m_5$coefficients[2,1]) 
m_tuples[8,1] <- as.numeric(m_5$coefficients[2,1]) - (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[8,3] <- as.numeric(m_5$coefficients[2,1]) + (1.65 * as.numeric(m_5$coefficients[2,2]))
m_tuples[9,2] <- as.numeric(m_5$coefficients[3,1]) 
m_tuples[9,1] <- as.numeric(m_5$coefficients[3,1]) - (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[9,3] <- as.numeric(m_5$coefficients[3,1]) + (1.65 * as.numeric(m_5$coefficients[3,2]))
m_tuples[10,2] <- as.numeric(m_6$coefficients[2,1]) 
m_tuples[10,1] <- as.numeric(m_6$coefficients[2,1]) - (1.65 * as.numeric(m_6$coefficients[2,2]))
m_tuples[10,3] <- as.numeric(m_6$coefficients[2,1]) + (1.65 * as.numeric(m_6$coefficients[2,2]))


m_tuples[,4] <- "GenInq"
m_tuples[,5] <- c("Lawrence", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge + Lofton", "Goodridge", "Goodridge + Lofton", "Goodridge + Lofton", "Lofton")
m_tuples[,6] <- c("Lawrence", "Lawrence", "Lawrence", "Goodridge", "Goodridge", "Lofton", "Goodridge", "Goodridge", "Lofton", "Lofton")


# store as dataframe for plotting
plotData <- as.data.frame(m_tuples)
colnames(plotData) <- c("Lower", "Estimate", "Upper", "Dictionary", "Model", "Covariate")
plotData$Estimate <- as.numeric(as.character(plotData$Estimate))
plotData$Lower <- as.numeric(as.character(plotData$Lower))
plotData$Upper <- as.numeric(as.character(plotData$Upper))
plotData$Model <- as.factor(plotData$Model)
plotData$Model <- ordered(plotData$Model, levels = c("Lofton", "Goodridge + Lofton", "Goodridge", "Lawrence + Goodridge + Lofton", "Lawrence + Goodridge", "Lawrence"))
plotData$Covariate <- as.factor(plotData$Covariate)
plotData$Covariate <- ordered(plotData$Covariate, levels = c("Lawrence", "Goodridge", "Lofton"))

# plot all of these
pdf("figA11_seqmodels_nrc.pdf", height=6, width=4)
plotData %>% 
  ggplot() + 
  geom_point(aes(x=Estimate,y=Model)) +
  geom_segment(aes(x=Lower, xend=Upper, y=Model, yend=Model)) +
  geom_vline(xintercept=0, color = "darkgrey") +
  xlim(-.31,.31) +
  facet_wrap(~Covariate, ncol = 1) +
  theme_bw()
dev.off()

# Table A6
m_1
m_2
m_3
m_4
m_5
m_6
